* Project      :  The Full, Persistent, and Symmetric Pass-Through of a Temporary VAT Cut
* Authors      :  Márcia Silva-Pereira, João Quelhas, Tiago Bernardino, Ricardo Duque Gabriel
* Date         :  01/04/2025
* Description  :  SC DiD Regression
* Dependencies :
* Modifications: (add date, author and change)

*********************************************************
*                   SC DiD Regression                   *
*********************************************************
	
* FIGURE C.2: Robustness: Synthetic Difference-in-Difference (A) Policy Implementation
*********************************************************

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

// Set the panel structure.
xtset id_sm date, daily

// Create a new variable 'total' containing the count of daily observations for each 'id_sm'.
bys id_sm: gen total = _N

// Summarize the 'total' variable to find the maximum count of daily observations.
sum total

// Keep only the observations where 'total' is between 410 and 426.
keep if total > 410 & total < 426

// Set the panel structure.
xtset id_sm date, daily

// Imputation.
tsfill

// Save the relevant variables.
keep id_sm id date name supermarket price treatment ecoicop5 five_day_number weight

// Loop through the variables and replace missing values

foreach v of varlist id date name supermarket price treatment ecoicop5 five_day_number weight {
			
	// Replace missing values with the previous value if conditions are met.
		
	by id_sm (date): replace `v' = `v'[_n-1] if missing(`v') & _n > 1 & !missing(`v'[_n-1]) & _n < _N
}

// Set the panel structure.
xtset id_sm date, daily

// Create a new variable 'total' containing the count of daily observations for each 'id_sm'.
bys id_sm: gen total = _N

// Summarize the 'total' variable to find the maximum count of daily observations.
sum total

// Keep only the observations where 'total' is equal to the maximum count (r(max)).
keep if total == r(max)

collapse (${aggregation}) ${variable} (first) ecoicop5 treatment weight, by (id_sm five_day_number)

// Compute a price index with base in the day before policy announcement I.
local   base_date 22
egen    base_price_23032023 = total(${variable}/ (five_day_number == `base_date')), by(id_sm)
gen     index_23032023 = (${variable}/ base_price_23032023) * 100

xtset id_sm five_day_number

// Keep the data around the event.
keep if five_day_number >= 19
keep if five_day_number <= 30

// Policy variable: 1 after the reference date.
replace treatment = 0 if five_day_number < 23

// Keep only the specified variables: id_sm, date, index_17042023, and treatment.
keep id_sm five_day_number index_23032023 treatment

// Set the panel structure.
xtset id_sm five_day_number

// Create a new variable 'total' containing the count of daily observations for each 'id_sm'.
bys id_sm: gen total = _N

// Summarize the 'total' variable to find the maximum count of daily observations.
sum total

// Keep only the observations where 'total' is equal to the maximum count (r(max)).
keep if total == r(max)

unique id_sm
unique five_day_number

// Run the synthetic control DiD regression.
qui sdid index_23032023 id_sm five_day_number treatment, vce(noinference) graph

// BRIEF DESCRIPTION OF THE COMMAND:
// sdid depvar groupvar timevar treatment [if] [in] , vce(type) 
// [ covariates(varlist, type) seed(#) reps(#) method(type) 
// graph g1on g1 opt(string) g2 opt(string) graph export(string, type) 
// msize(markersizestyle) unstandardized mattitles ]
// depvar describes the dependent variable in a balanced panel of units (groupvar) and time periods (timevar)
// The variable which indicates units which are treated at each time period, which accumulates over time is treatment.
// Note that here, it is not necessary for users to specify whether the design is a block or staggered
// adoption design, as this will be inferred based off the data structure.
// vce(type) -> bootstrap, jackknife, placebo or noinference
// graph if this option is specified, graphs will be displayed showing unit and time weights
// as well as outcome trends as per Figure 1 from Arkhangelsky et al. (2021).
// g1on this option activates the unit-specific weight graph. By default g1 is off.
// Observations with zero weight are denoted by an X symbol.

// Weight Graphs

// (1) Save time weights which are used to calculate DID estimates.
// Lambda weights are time-specific weights.

preserve

// Store the variables lambda1 and lambda2 in a matrix named lambda.
clear

matrix lambda = e(lambda)

// Save the matrix as a Stata dataset.
svmat lambda

// Rename the variables lambda1 and lambda2 to lambda and date respectively.
ren (lambda1 lambda2) (lambda five_day_number)

// Keep only the observations where the date is on or before April 18, 2023.
keep if five_day_number < 23

// Create a temporary file to store the filtered dataset.
tempfile dlambda

// Save the filtered dataset in the temporary file.
save `dlambda'

restore

// (2) Save unit weights which are used to calculate DID estimates.
// Omega weights are unit-specific weights

preserve

// Store the variable omega in a matrix.
clear
matrix omega = e(omega)  

// Save the matrix as a dataset.
svmat omega

// Rename the variables for clarity.
ren (omega1 omega2) (omega id_sm)

 // Create a temporary file to store the dataset.
tempfile domega

// Save the dataset in the temporary file.
save `domega'

restore

// (4) Event-study analysis

// Pre-treatment periods: 17
// Post-treatment periods: 8

// For each period t, we wish to consider whether differences between treated units and
// synthetic controls have changed when compared to baseline differences.

// Save lambda weight.
matrix lambda    = e(lambda)[1..4,1]          

// Control baseline.
matrix yco       = e(series)[1..4,2]         

// Treated baseline.
matrix ytr       = e(series)[1..4,3]         

 // Calculate the pre-treatment mean.
matrix aux       = lambda'*(ytr - yco)       

// Create a scalar.
scalar meanpre_o = aux[1,1]

// The quantity of interest for each time period t is generated as
// the variable d, which is plotted below as the blue points on the event study.

// Store Ytr-Yco.
matrix difference = e(difference)[1..12,1..2] 

// Save the matrix as a dataset.
svmat difference

// Rename the variables.
ren (difference1 difference2) (time d)

// Calculate vector d.
replace d = d - meanpre_o 					

// Bootstrap procedure for confidence interval

// Number of the first bootstrap.
local b = 1
// Total number of bootstraps.
local B = 1000

// In each bootstrap resample, we first ensure that both treatment and control units are
// present (using the locals r1 and r2), and then re-estimate the sdid procedure with the
// new bootstrap sample generated using Stata's bsample command.

// To do so, we simply follow an identical procedure as that conducted above, 
// however now save the resulting resampled values of the quantities
// as a series of matrices d`b' for later processing to generate
// confidence intervals in the event study.

// Loop over clusters from b=1 to B
while `b' <= `B' {                   
    
	preserve

    // Run the balancing test
    bsample, cluster(id_sm) idcluster(c2)
    qui count if treatment == 0
    local r1 = r(N)
    qui count if treatment != 0
    local r2 = r(N)

    // Check if both treated and control groups have non-zero observations
    if (`r1' != 0 & `r2' != 0) {

        // Run the SDID estimation and store relevant results.
        qui sdid index_23032023 c2 five_day_number treatment, vce(noinference) graph
        matrix lambda_b  = e(lambda)[1..4, 1]          // Save lambda weight.
        matrix yco_b     = e(series)[1..4, 2]          // Control baseline.
        matrix ytr_b     = e(series)[1..4, 3]          // Treated baseline.
        matrix aux_b     = lambda_b'*(ytr_b - yco_b)    // Calculate the pre-treatment mean.
        matrix meanpre_b = J(12, 1, aux_b[1, 1])
        matrix d`b'      = e(difference)[1..12, 2] - meanpre_b
		
		// Increment the cluster counter.
        local ++b                                    
    }

    restore                             
}

// The final step is to calculate the standard deviation of each estimate based
// on the bootstrap resamples, and then to generate confidence intervals for each parameter
// based on the estimates generated above (d), as well as their standard errors.

// Save the current dataset state
preserve

// Keep only the relevant variables 'time' and 'd'
keep time d

// Remove observations where 'time' is missing
keep if time != .

// Number of the first bootstrap.
local b = 1
// Total number of bootstraps.
local B = 1000

// Loop over clusters from b=1 to `B'
forval b = 1/`B' {

    // Load the saved matrix d`b' (assuming it was previously saved using svmat)
    svmat d`b'

}

// Calculate the standard deviation of the differences between d11 and d`B'1
egen rsd = rowsd(d11 - d`B'1)

// Calculate lower bounds on bootstrap CIs
gen LCI = d + invnormal(0.025)*rsd

// Calculate upper bounds on bootstrap CIs
gen UCI = d + invnormal(0.975)*rsd

grstyle init
grstyle set plain, nogrid box noextend
grstyle graphsize x 14
grstyle graphsize y 8
grstyle color background white

// Make the plot with the results.
twoway  rarea UCI LCI time, color("dknavy*0.2") || scatter d time, 		///
	color("dknavy") m(circle) recast(connected) ytitle("Percentage change") ///
	legend(off) xscale(lcolor(black) lwidth(none) line)			///
	ylabel(-10(2)2) yline(0, lc(black) lp(dash) lw(medthin)) 		///
	yline(-5.66, lc("dknavy") lp(dash) lwidth(medthin))  			///
	text(-6.2 18.2 "100% Pass-Through", color("dknavy") placement(e)) 	///
	tlabel(18 " " 19 "-4" 20 "-3" 21 "-2" 22 "-1" 23 "0"  			///
	24 "1" 25 "2" 26 "3" 27 "4" 28 "5" 29 "6" 30 "7" 31 " ")		///
	graphregion(fcolor(white) lcolor(black)) 				///
	yscale(lcolor(black) lwidth(none) line) yscale(lcolor(black)  		///
	lwidth(none) line) xtitle("5-day Periods to Event") 			///
	xline(23, lcolor("gray*0.5") lpattern(solid) lwidth(medthin)) 		///
	text(-9.5 23.1 "Announcement", color("gray") placement(e)) 		///
	xline(28, lcolor("gray*0.5") lpattern(solid) lwidth(medthin)) 		///
	text(-9.5 28.1 "Implementation", color("gray") placement(e))	
	
// Export the graph.
graph export "${results_figures}/figure_c2a.png", as(png) replace width(2400) height(1372)

restore

*********************************************************

	
* FIGURE C.2: Robustness: Synthetic Difference-in-Difference (B) Policy Reversal
*********************************************************

// Load the cleaned dataset.
use "${path_work}/auxiliar/imputation_0_only_food.dta", clear

// Set the panel structure.
xtset id_sm date, daily

// Create a new variable 'total' containing the count of daily observations for each 'id_sm'.
bys id_sm: gen total = _N

// Summarize the 'total' variable to find the maximum count of daily observations.
sum total

// Keep only the observations where 'total' is between 410 and 426.
keep if total > 410 & total < 426

// Set the panel structure.
xtset id_sm date, daily

// Imputation.
tsfill

// Save the relevant variables.
keep id_sm id date name supermarket price treatment ecoicop5 five_day_number weight

// Loop through the variables and replace missing values

foreach v of varlist id date name supermarket price treatment ecoicop5 five_day_number weight {
			
	// Replace missing values with the previous value if conditions are met.
		
	by id_sm (date): replace `v' = `v'[_n-1] if missing(`v') & _n > 1 & !missing(`v'[_n-1]) & _n < _N
}

// Set the panel structure using 'xtset' with 'id_sm' as the cross-sectional identifier and 'date' as the time variable.
xtset id_sm date, daily

// Create a new variable 'total' containing the count of daily observations for each 'id_sm'.
bys id_sm: gen total = _N

// Summarize the 'total' variable to find the maximum count of daily observations.
sum total

// Keep only the observations where 'total' is equal to the maximum count (r(max)).
keep if total == r(max)

collapse (${aggregation}) ${variable} (first) ecoicop5 treatment weight, by (id_sm five_day_number)

// Compute a price index with base in the day before policy announcement I.
local   base_date 65
egen    base_price_23032023 = total(${variable}/ (five_day_number == `base_date')), by(id_sm)
gen     index_23032023 = (${variable}/ base_price_23032023) * 100

xtset id_sm five_day_number

// Keep the data around the event.
keep if five_day_number >= 62
keep if five_day_number <= 82

// Policy variable: 1 after the reference date.
replace treatment = 0 if five_day_number < 66

// Set the data as a panel with daily observations.
// Keep only the specified variables: id_sm, date, index_17042023, and treatment.
keep id_sm five_day_number index_23032023 treatment

// Set the panel structure using 'xtset' with 'id_sm' as the cross-sectional identifier and 'date' as the time variable.
xtset id_sm five_day_number

// Create a new variable 'total' containing the count of daily observations for each 'id_sm'.
bys id_sm: gen total = _N

// Summarize the 'total' variable to find the maximum count of daily observations.
sum total

// Keep only the observations where 'total' is equal to the maximum count (r(max)).
keep if total == r(max)

egen treatment_total = total(treatment), by(id_sm)
keep if treatment_total == 0 | treatment_total == 17

unique id_sm
unique five_day_number

// Run the synthetic control DiD regression.
qui sdid index_23032023 id_sm five_day_number treatment, vce(noinference) graph

// BRIEF DESCRIPTION OF THE COMMAND:
// sdid depvar groupvar timevar treatment [if] [in] , vce(type) 
// [ covariates(varlist, type) seed(#) reps(#) method(type) 
// graph g1on g1 opt(string) g2 opt(string) graph export(string, type) 
// msize(markersizestyle) unstandardized mattitles ]
// depvar describes the dependent variable in a balanced panel of units (groupvar) and time periods (timevar)
// The variable which indicates units which are treated at each time period, which accumulates over time is treatment.
// Note that here, it is not necessary for users to specify whether the design is a block or staggered
// adoption design, as this will be inferred based off the data structure.
// vce(type) -> bootstrap, jackknife, placebo or noinference
// graph if this option is specified, graphs will be displayed showing unit and time weights
// as well as outcome trends as per Figure 1 from Arkhangelsky et al. (2021).
// g1on this option activates the unit-specific weight graph. By default g1 is off.
// Observations with zero weight are denoted by an X symbol.

// Weight Graphs

// (1) Save time weights which are used to calculate DID estimates.
// Lambda weights are time-specific weights.

preserve

// Store the variables lambda1 and lambda2 in a matrix named lambda.
clear

matrix lambda = e(lambda)

// Save the matrix as a Stata dataset.
svmat lambda

// Rename the variables lambda1 and lambda2 to lambda and date respectively.
ren (lambda1 lambda2) (lambda five_day_number)

// Keep only the observations where the date is on or before April 18, 2023.
keep if five_day_number < 66

// Create a temporary file to store the filtered dataset.
tempfile dlambda

// Save the filtered dataset in the temporary file.
save `dlambda'

restore

// (2) Save unit weights which are used to calculate DID estimates.
// Omega weights are unit-specific weights

preserve

// Store the variable omega in a matrix.
clear
matrix omega = e(omega)  

// Save the matrix as a dataset.
svmat omega

// Rename the variables for clarity.
ren (omega1 omega2) (omega id_sm)

 // Create a temporary file to store the dataset.
tempfile domega

// Save the dataset in the temporary file.
save `domega'

restore

// (4) Event-study analysis

// Pre-treatment periods: 4
// Post-treatment periods: 18

// For each period t, we wish to consider whether differences between treated units and
// synthetic controls have changed when compared to baseline differences.

// Save lambda weight.
matrix lambda    = e(lambda)[1..4,1]          

// Control baseline.
matrix yco       = e(series)[1..4,2]         

// Treated baseline.
matrix ytr       = e(series)[1..4,3]         

 // Calculate the pre-treatment mean.
matrix aux       = lambda'*(ytr - yco)       

// Create a scalar.
scalar meanpre_o = aux[1,1]

// The quantity of interest for each time period t is generated as
// the variable d, which is plotted below as the blue points on the event study.

// Store Ytr-Yco.
matrix difference = e(difference)[1..21,1..2] 

// Save the matrix as a dataset.
svmat difference

// Rename the variables.
ren (difference1 difference2) (time d)

// Calculate vector d.
replace d = d - meanpre_o 					

// Bootstrap procedure for confidence interval

// Number of the first bootstrap.
local b = 1
// Total number of bootstraps.
local B = 1000

// In each bootstrap resample, we first ensure that both treatment and control units are
// present (using the locals r1 and r2), and then re-estimate the sdid procedure with the
// new bootstrap sample generated using Stata's bsample command.

// To do so, we simply follow an identical procedure as that conducted above, 
// however now save the resulting resampled values of the quantities
// as a series of matrices d`b' for later processing to generate
// confidence intervals in the event study.

// Loop over clusters from b=1 to B
while `b' <= `B' {                   
    
	preserve

    // Run the balancing test
    bsample, cluster(id_sm) idcluster(c2)
    qui count if treatment == 0
    local r1 = r(N)
    qui count if treatment != 0
    local r2 = r(N)

    // Check if both treated and control groups have non-zero observations
    if (`r1' != 0 & `r2' != 0) {

        // Run the SDID estimation and store relevant results.
        qui sdid index_23032023 c2 five_day_number treatment, vce(noinference) graph
        matrix lambda_b  = e(lambda)[1..4, 1]          // Save lambda weight.
        matrix yco_b     = e(series)[1..4, 2]          // Control baseline.
        matrix ytr_b     = e(series)[1..4, 3]          // Treated baseline.
        matrix aux_b     = lambda_b'*(ytr_b - yco_b)    // Calculate the pre-treatment mean.
        matrix meanpre_b = J(21, 1, aux_b[1, 1])
        matrix d`b'      = e(difference)[1..21, 2] - meanpre_b
		
		// Increment the cluster counter.
        local ++b                                    
    }

    restore                             
}

// The final step is to calculate the standard deviation of each estimate based
// on the bootstrap resamples, and then to generate confidence intervals for each parameter
// based on the estimates generated above (d), as well as their standard errors.

// Save the current dataset state
preserve

// Keep only the relevant variables 'time' and 'd'
keep time d

// Remove observations where 'time' is missing
keep if time != .

// Number of the first bootstrap.
local b = 1
// Total number of bootstraps.
local B = 1000

// Loop over clusters from b=1 to `B'
forval b = 1/`B' {

    // Load the saved matrix d`b' (assuming it was previously saved using svmat)
    svmat d`b'

}

// Calculate the standard deviation of the differences between d11 and d`B'1
egen rsd = rowsd(d11 - d`B'1)

// Calculate lower bounds on bootstrap CIs
gen LCI = d + invnormal(0.025)*rsd

// Calculate upper bounds on bootstrap CIs
gen UCI = d + invnormal(0.975)*rsd

grstyle init
grstyle set plain, nogrid box noextend
grstyle graphsize x 14
grstyle graphsize y 8
grstyle color background white

// Make the plot with the results.
twoway  rarea UCI LCI time, color("dknavy*0.2") || scatter d time, 		///
	color("dknavy") m(circle) recast(connected) ytitle("Percentage change") ///
	legend(off) xscale(lcolor(black) lwidth(none) line)			///
	ylabel(-2(2)12) yline(0, lc(black) lp(dash) lw(medthin)) 		///
	yline(6, lc("dknavy") lp(dash) lwidth(medthin))  			///
	text(5.5 61 "100% Pass-Through", color("dknavy") placement(e)) 	///
	tlabel(61 " " 62 "-4" 63 "-3" 64 "-2" 65 "-1" 66 "0" 67 "1" 68 "2"  	///
	69 "3" 70 "4" 71 "5" 72 "6" 73 "7" 74 "8" 75 "9" 76 "10" 77 "11"        ///
	78 "12" 79 "13" 80 "14" 81 "15" 82 "16" 83 " ") 			///
	graphregion(fcolor(white) lcolor(black)) 				///
	yscale(lcolor(black) lwidth(none) line) yscale(lcolor(black)  		///
	lwidth(none) line) xtitle("5-day Periods to Event") 			///
	xline(66, lcolor("gray*0.5") lpattern(solid) lwidth(medthin)) 		///
	text(11.5 66.1 "Announcement", color("gray") placement(e)) 		///
	xline(80, lcolor("gray*0.5") lpattern(solid) lwidth(medthin)) 		///
	text(11.5 77.8 "Reversal", color("gray") placement(e))		

// Export the graph.
graph export "${results_figures}/figure_c2b.png", as(png) replace width(2400) height(1372)

restore

*********************************************************

erase "${path_work}/auxiliar/imputation_0_only_food.dta"

graph close _all

*********************************************************
